clear all
set more off

program main
    * Prepare data
    import delimited "FakeNewsSites.txt", delimiter("|") case(preserve) clear
    rename url site
    tempfile list
    save `list'
    import delimited "StartEndFake.txt", clear delimiter("|")
    rename url site
    gen d1 = date(start, "YMD")
    gen d2 = date(end, "YMD")
    format d1 d2 %d
    tempfile time
    save `time'
    use "SiteFake.dta", clear
    collapse (sum) SharesFacebook SharesTwitter, by(site quarter yrmo)
    merge m:1 site using `list', keep(2 3) nogen
    merge m:1 site using `time', keep(1 3) nogen

    * Multiple sources
    gen Source2 = fake >= 2
    gen Source3 = fake >= 3
    plot_shares, subset("Source2") max_fb(45)  max_tw(1.8)  max_r(120)
    plot_shares, subset("Source3") max_fb(4.5) max_tw(0.12) max_r(150)

    * Time coverage
    quietly sum length
    gen LastLong      = (length == r(max))
    gen AfterElection = d1 >= d(08nov2016) & d1 != .
    gen ActiveNow     = d2 >= d(31jul2018) & d2 != .
    plot_shares, subset("AfterElection") max_fb(30)  max_tw(0.45) max_r(90)
    plot_shares, subset("ActiveNow")     max_fb(120) max_tw(6)    max_r(45)
    plot_shares, subset("LastLong")      max_fb(60)  max_tw(4.5)  max_r(45)

    * Size of engagements
    preserve
    drop if missing(SharesFacebook) & missing(SharesTwitter)
    keep if yrmo >= m(2015m1) & yrmo <= m(2018m7)
    collapse (sum) SharesFacebook SharesTwitter, by(site)
    gen SharesTotal = SharesFacebook + SharesTwitter
    gsort -SharesTotal site
    gen SharesRank = _n
    xtile SharesDecile = SharesTotal, nq(10)
    tempfile temp
    save `temp'
    restore
    merge m:1 site using `temp', assert(1 3) nogen
    gen ExcludeTop5 = (SharesRank > 5 & SharesRank != .)
    gen Decile1     = SharesDecile == 10
    gen DecileRest  = SharesDecile <= 9
    plot_shares, subset("ExcludeTop5") max_fb(150) max_tw(3.6)  max_r(75)
    plot_shares, subset("Decile1")     max_fb(150) max_tw(4.5)  max_r(60)
    plot_shares, subset("DecileRest")  max_fb(36)  max_tw(0.75) max_r(60)

    * Likelihood to publish misinformation (Lazar et al. 2018)
    plot_shares, subset("fake_Lazar_black")  max_fb(75) max_tw(0.6) max_r(180)
    plot_shares, subset("fake_Lazar_red")    max_fb(45) max_tw(1.8) max_r(60)
    plot_shares, subset("fake_Lazar_orange") max_fb(45) max_tw(2.4) max_r(45)

    * Excluding particular sources
    foreach source in PolitiFact FactCheck BuzzFeed GuessNyhanReifler {
        gen Exclude`source' = 1 - (fake_`source' == 1 & fake == 1)
    }
    gen ExcludeLazar = (fake_PolitiFact + fake_FactCheck + fake_BuzzFeed + fake_GuessNyhanReifler) >= 1
    plot_shares, subset("ExcludePolitiFact")        max_fb(150) max_tw(4.5) max_r(60)
    plot_shares, subset("ExcludeFactCheck")         max_fb(180) max_tw(6)   max_r(60)
    plot_shares, subset("ExcludeBuzzFeed")          max_fb(180) max_tw(6)   max_r(60)
    plot_shares, subset("ExcludeGuessNyhanReifler") max_fb(150) max_tw(4.5) max_r(60)
    plot_shares, subset("ExcludeLazar")             max_fb(120) max_tw(2.4) max_r(90)

    * Political sites
    use "SitePolitical.dta", clear
    gen Political = 1
    plot_shares, subset("Political") max_fb(24) max_tw(3.6) max_r(15)
end

program plot_shares
    syntax, subset(string) max_fb(string) max_tw(string) max_r(string)

    preserve
    keep if `subset' == 1 & ~missing(SharesFacebook) & ~missing(SharesTwitter)
    keep if yrmo >= m(2015m1) & yrmo <= m(2018m7)
    if "`subset'" == "AfterElection" {
        drop if yrmo < m(2016m11)
    }
    encode site, gen(site_num)
    sum site_num
    local N = r(max)
    collapse (sum) SharesFacebook SharesTwitter, by(quarter yrmo)
    collapse (mean) SharesFacebook SharesTwitter, by(quarter)
    replace SharesFacebook = SharesFacebook / 1000000
    replace SharesTwitter  = SharesTwitter  / 1000000
    gen ratio = SharesFacebook / SharesTwitter

    plot_trend, var(SharesFacebook) name(facebook) max(`max_fb') ytitle("Facebook engagements (million)")
    plot_trend, var(SharesTwitter)  name(twitter)  max(`max_tw') ytitle("Twitter shares (million)")
    plot_trend, var(ratio)          name(ratio)    max(`max_r')  ytitle("Facebook engagements / Twitter shares")

    graph combine g_facebook g_twitter g_ratio, rows(1) graphregion(color(white)) xsize(10) ysize(3) ///
      note("Number of sites: `N'", size(6))
    graph export "TrendRobustness`subset'.eps", replace
    restore
end

program plot_trend
    syntax, var(string) name(string) max(string) ytitle(string)

    local interval = `max' / 3
    twoway line `var' quarter, graphregion(color(white)) name(g_`name', replace) ///
      ytitle("`ytitle'", size(6)) ylabel(0(`interval')`max', angle(0) labsize(7)) ///
      xtitle("") tlabel(2015q1(4)2018q1, angle(0) format(%tqCY) tlength(*2) labsize(7)) tmtick(##4)
end

main
